## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.2.1     ✔ dplyr   1.1.2
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'lubridate'
## 
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 
## 
## corrplot 0.92 loaded
## 
## 
## Attaching package: 'ggpp'
## 
## 
## The following object is masked from 'package:ggplot2':
## 
##     annotate
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## 
## Attaching package: 'lmerTest'
## 
## 
## The following object is masked from 'package:lme4':
## 
##     lmer
## 
## 
## The following object is masked from 'package:stats':
## 
##     step
## 
## 
## 
## Attaching package: 'maps'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     map
## 
## 
## ℹ Google's Terms of Service: ]8;;https://mapsplatform.google.com<https://mapsplatform.google.com>]8;;
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## 
## Loading required package: sp
## 
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
## 
## 
## Attaching package: 'raster'
## 
## 
## The following object is masked from 'package:lme4':
## 
##     getData
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## rgeos version: 0.6-3, (SVN revision 696)
##  GEOS runtime version: 3.10.2-CAPI-1.16.0 
##  Please note that rgeos will be retired during October 2023,
## plan transition to sf or terra functions using GEOS at your earliest convenience.
## See https://r-spatial.org/r/2023/05/15/evolution4.html for details.
##  GEOS using OverlayNG
##  Linking to sp version: 1.6-0 
##  Polygon checking: TRUE 
## 
## 
## 
## Attaching package: 'rgeos'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     symdiff

Overview

This script pulls in cleaned stream temperature data from TASR sites (data cleaning completed in the teton_data_cleaning.Rmd script) and SNOTEL data from the Phillips Bench and Grand Targhee SNOTEL stations, then runs and tests models of august stream temperature as a function of both time and weather-related variables. Input datasets are:

Primary Sections are:

I. Read in and clean data II. Data checking and predictor selection III. Model testing and selection IV. Model visualizations

I. Read in and clean data

Read in and clean data:

#Source data:
source <- read.csv("source_info.csv")%>%
  rename(site = stream) #rename for merge

#Temperature data:
temp_data <- read.csv("Temperature/cleaned_full_datasets/temps_hourly.csv")%>%
  mutate(date1 = ymd(date1), 
         year = year(date1))%>%
  filter(!is.na(temp_c), !year == 2023)%>% #Drop missing data, 2023 data (no August data for 2023 yet). 
  merge(source, all = T)%>% #Add in source data
  group_by(site, full_name, stream_code, date1, year, lat, long)%>%
  summarise(t_xbar = mean(temp_c, na.rm = T), 
            t_max = max(temp_c, na.rm = T), 
            t_min = min(temp_c, na.rm = T), 
            t_range = t_max-t_min, 
            source = unique(source))%>% #calculate daily mean, min, and max temps for each site along with temp range
  filter(month(date1)==8)%>% #extract data for August only
  group_by(site)%>% #group by stream
  mutate(n_yrs = length(unique(year)))%>%
  ungroup()%>%
  filter(!n_yrs<3)
## `summarise()` has grouped output by 'site', 'full_name', 'stream_code',
## 'date1', 'year', 'lat'. You can override using the `.groups` argument.
#SNOTEL data:
snotel <- read.csv("teton_snotel_23.csv")%>%
  mutate(date1 = as.Date(date, format = "%m/%d/%y"), #recode date column as R date
         airtemp_c = as.numeric(airtemp_c))%>% #recode airtemp as numeric
  group_by(date1)%>% #group by date
  summarise(swe_xbar = mean(swe_cm), #calculate mean SWE and airtemp across both stations
            airtemp_xbar = mean(airtemp_c, na.rm = T))%>%
  mutate(mo = month(date1), year = year(date1))%>% #Add month and year cols
  group_by(year)%>% #group by year
  mutate(swe_max = max(swe_xbar), #extract max SWE and add to new col
         swe_max_date = date1[which.max(swe_xbar)], #extract max swe date and add to new column
         swe_zero_date = date1[which.min(swe_xbar)][1])%>% #extract first day with swe = 0 for each year and add to new column
  ungroup()%>% #ungroup to get all columns back
  filter(mo == 8|mo ==7)%>% #extract July & August temperatures
  group_by(year)%>% #group by year
  summarise(airtemp_summer = mean(airtemp_xbar), #calculate mean july-aug air temp
            swe_max = unique(swe_max), 
            swe_max_date = ymd(unique(swe_max_date)),  
            swe_zero_date =ymd(unique(swe_zero_date)))%>% #keep max SWE and, max swe date, and swe zero date
  mutate(melt_days = as.numeric(swe_zero_date-swe_max_date), #calculate melt days (swe zero date minus max swe date)
         swe_max_doy = yday(swe_max_date), 
         swe_zero_doy = yday(swe_zero_date)) #add columns for swe max and swe zero day of year - easier to use as predictor. 

Merge stream and SNOTEL data:

temp_snotel <- merge(temp_data, snotel)

II. Data checking and predictor selection

A. Checking distribution of response variable (august stream temps):

Pivot data for easier plotting:

temp_pivot <- temp_snotel%>%
  pivot_longer(!c(year:long, source:swe_zero_doy), names_to = "temp_type", values_to = "stream_temp")%>% #Pivot to get column for temperature, row for each temp type (min, max, mean, range) for each day
  mutate(transform = log10(stream_temp))%>%
  mutate(yr_scale = scale(year), #create centered/scaled version of each predictor varaible
         sweMax_scale = scale(swe_max), 
         sweMaxDoy_scale = scale(swe_max_doy), 
         sweZeroDoy_scale = scale(swe_zero_doy), 
         meltDays_scale = scale(melt_days), 
         summerAirtemp_scale = scale(airtemp_summer))

Quick visualizations - how are the data distributed across all groups and years?

ggplot(temp_pivot,aes(x = stream_temp, fill = factor(source)))+ # variable = stream temp
  geom_histogram()+ #draw histogram
  facet_wrap(~temp_type, scales = "free_x")+ #facet by temperature type
  theme_classic() #get rid of extra formatting
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

which(temp_pivot$stream_temp<=0)
##  [1]  187  380  408  444 1228 1419 1732 1739 1924 2040 2620 2748 3379 3467 3539
## [16] 3640 3683 3692 3720 3740 3752 4271 4275 4308 4419 4423 4436 4556 4572 4672
## [31] 4812 4888 5004 5008 5176 5255 5364 5384 5644 5820 5968 5984 6064 6108 6216
## [46] 6364 6420 6624 6644 6848 6888
temp_pivot[which(temp_pivot$stream_temp<=0),]
## # A tibble: 51 × 25
##     year site       full_name    stream_code date1        lat  long source n_yrs
##    <dbl> <chr>      <chr>        <chr>       <date>     <dbl> <dbl> <chr>  <int>
##  1  2015 s_cascade  South Casca… SC          2015-08-23  43.7 -111. sub_i…     5
##  2  2016 n_teton    North Teton  NT          2016-08-04  43.8 -111. snowm…     8
##  3  2016 s_cascade  South Casca… SC          2016-08-02  43.7 -111. sub_i…     5
##  4  2016 windcave   Wind Cave    WC          2016-08-05  43.7 -111. sub_i…     6
##  5  2017 s_ak_basin S. AK Basin  AK          2017-08-11  43.7 -111. sub_i…     6
##  6  2018 s_teton    South Teton  ST          2018-08-27  43.7 -111. snowm…     6
##  7  2018 mid_teton  Middle Teton MT          2018-08-01  43.7 -111. glaci…     7
##  8  2018 grizzly    Grizzly      GR          2018-08-27  43.8 -111. snowm…     5
##  9  2018 s_teton    South Teton  ST          2018-08-03  43.7 -111. snowm…     6
## 10  2018 delta      Delta        DE          2018-08-01  43.7 -111. glaci…     6
## # ℹ 41 more rows
## # ℹ 16 more variables: airtemp_summer <dbl>, swe_max <dbl>,
## #   swe_max_date <date>, swe_zero_date <date>, melt_days <dbl>,
## #   swe_max_doy <dbl>, swe_zero_doy <dbl>, temp_type <chr>, stream_temp <dbl>,
## #   transform <dbl>, yr_scale <dbl[,1]>, sweMax_scale <dbl[,1]>,
## #   sweMaxDoy_scale <dbl[,1]>, sweZeroDoy_scale <dbl[,1]>,
## #   meltDays_scale <dbl[,1]>, summerAirtemp_scale <dbl[,1]>

Minimum and range are left-skewed (especially range); mean and max look bimodal, especially for snowmelt. Same patterns within years.

Also have 51 0’s in the temperature column (either temp range or minimum)

Testing Log transform:

ggplot(temp_pivot,aes(x = transform, fill = factor(source)))+ # variable = stream temp
  geom_histogram()+ #draw histogram
  facet_wrap(~temp_type, scales = "free_x")+ #facet by temperature type
  theme_classic() #get rid of extra formatting
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 51 rows containing non-finite values (stat_bin).

Log transform doesn’t really help - just makes t_min and t_range right-skewed, still have weird stuff going on with max and mean.

B. Predictor variable selection

Possible predictor variables are:

  • Year (Is august stream temperature changing over time?)
  • Max SWE (Does maximum SWE affect august stream temperature?)
  • Max SWE date (Does the timing of maximum SWE affect august stream temperature?)
  • SWE Zero date (Does the date that sites were snow-free affect august stream temperature?)
  • Melt days (Does how fast the snowpack melted affect august stream temperature?)
  • Summer (July-August) air temperature (Does air temperature during july/august affect summer stream temperature?)

i. Checking relationships between predictors and august stream temp:

Checking relationships between predictors (centered/scaled) and each response variable; colors for each source type. For simplicity, convert data to get one column for predictor type, one column for predictor value:

Data prep:

pred_pivot <- temp_pivot%>%
  dplyr::select(-c(airtemp_summer:swe_zero_doy), -year)%>% #drop swe max and zero date columns
  pivot_longer(!c(site:n_yrs, temp_type, stream_temp, transform), names_to = "predictor", values_to = "pred_value") #pivot to get columns for predictor name and predictor value; rows for each predictor x temp type x stream x year combo

Plotting for each potential model:

Min temperature:

p1 <- ggplot(filter(pred_pivot, temp_type == "t_min"), aes(x = pred_value, y = stream_temp, color = source))+ #data = snowmelt only; x = predictor value, y = stream temp, color = stream temperature type
  geom_point()+ #scatterplot
  geom_smooth()+ #fit smooth fxn to data
  facet_wrap(~predictor, scales = "free")+ #facet by predictor type; free scales. 
  labs(title = "t_min")
p1
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Relationships:

  • Year appears to be roughly linear for glacier-fed streams; swe_max maybe linear for snowmelt-fed streams.
  • Most predictors look flat-ish, especially for glacier and ice-fed streams.

Mean temperature:

p2 <- ggplot(filter(pred_pivot, temp_type == "t_xbar"), aes(x = pred_value, y = stream_temp, color = source))+ #data = snowmelt only; x = predictor value, y = stream temp, color = stream temperature type
  geom_point()+ #scatterplot
  geom_smooth()+ #fit smooth fxn to data
  facet_wrap(~predictor, scales = "free")+ #facet by predictor type; free scales. 
  labs(title = "t_xbar")
p2
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Relationships:

  • Swe max appears to be roughly linear (-), no obvious trends in other variables - appear non-linear.

T_max:

p3 <- ggplot(filter(pred_pivot, temp_type == "t_max"), aes(x = pred_value, y = stream_temp, color = source))+ #data = snowmelt only; x = predictor value, y = stream temp, color = stream temperature type
  geom_point()+ #scatterplot
  geom_smooth()+ #fit smooth fxn to data
  facet_wrap(~predictor, scales = "free")+ #facet by predictor type; free scales. 
  labs(title = "t_max")
p3
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Relationships:

  • Swe max again maybe linear (-)

T Range

p4 <- ggplot(filter(pred_pivot, temp_type == "t_range"), aes(x = pred_value, y = stream_temp, color = source))+ #data = snowmelt only; x = predictor value, y = stream temp, color = stream temperature type
  geom_point()+ #scatterplot
  geom_smooth()+ #fit smooth fxn to data
  facet_wrap(~predictor, scales = "free")+ #facet by predictor type; free scales. 
  labs(title = "t_range")
p4
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Combine and save:

predictor.grid <- p1/p2/p3/p4

ggsave("Temperature/plots/model_figs/predictor_grid.jpg", predictor.grid, device = "jpeg", units = "in", dpi = "retina", height = 16, width = 8.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Conclusions:

If modeling year and swe max, linear assumption is probably ok. If including other varaibles, may need to use a non-linear method.

ii. Checking predictor correlations:

Extract predictor variables:

predictors <- temp_snotel%>%
  dplyr::select(year, airtemp_summer, swe_max, melt_days, swe_max_doy, swe_zero_doy)%>% #extract predictor variables
  distinct() #remove duplicate values

Calculate correlation matrix and melt for plotting:

cormat <-round(cor(predictors), 2) #create correlation matrix with rounded values
cormat[upper.tri(cormat)]<- NA #remove upper triangle values for plotting
cormat_melt<-reshape2::melt(cormat, na.rm = T)%>% #melt for plotting
  arrange(desc(value))
cormat_melt #check output
##              Var1           Var2 value
## 1            year           year  1.00
## 2  airtemp_summer airtemp_summer  1.00
## 3         swe_max        swe_max  1.00
## 4       melt_days      melt_days  1.00
## 5     swe_max_doy    swe_max_doy  1.00
## 6    swe_zero_doy   swe_zero_doy  1.00
## 7  airtemp_summer           year  0.75
## 8     swe_max_doy airtemp_summer  0.72
## 9    swe_zero_doy      melt_days  0.59
## 10    swe_max_doy           year  0.58
## 11   swe_zero_doy        swe_max  0.55
## 12   swe_zero_doy airtemp_summer  0.42
## 13    swe_max_doy        swe_max  0.26
## 14   swe_zero_doy           year  0.22
## 15        swe_max airtemp_summer  0.21
## 16      melt_days        swe_max  0.20
## 17   swe_zero_doy    swe_max_doy  0.19
## 18        swe_max           year -0.18
## 19      melt_days airtemp_summer -0.28
## 20      melt_days           year -0.32
## 21    swe_max_doy      melt_days -0.68

Plot correlation matrix:

ggplot(cormat_melt, aes(x = Var1, y = Var2, fill = value))+ #plotting: fill = correlation value, axes = varaibles
  geom_tile(color = "white")+ #tile graph to get "pixels" for each predictor combo
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") + #gradient fill based on correlation
  theme_minimal()+ #removing xtra formatting
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 10, hjust = 1), 
    axis.text.y = element_text(
    size = 10),
    axis.title = element_blank())+ #adjusting axis text formatting
 coord_fixed()+ #fixed coordinate system
  geom_text(aes(label = value)) #adding labels with correlation values to each tile. 

Variables with Correlation > 0.7:

  • year ~ airtemp_summer (0.75)
  • swe_max_doy ~ airtemp_summer (0.72)

Variables with correlation between 0.6 & 0.7

  • swe_max_doy ~ melt_days (0.68)

Variables with correlation between 0.5 & 0.6

  • swe_zero_doy ~ melt_days (0.59)
  • swe_max_doy ~ year (0.58)
  • swe_zero_doy ~ swe_max (0.55)

Takeaways:

  • Probably can’t include year and summer air temp or swe_max_doy and summer airtemp in the same model.

Possible overall models:

  • year+swe_max_doy+swe_zero_doy+swe_max+melt_days
  • airtemp_summer+swe_zero_doy+swe_max+melt_days

III. Model testing and selection

i. Linear Mixed Model - Change in overall stream temperature over time:

Random effects structure:

  • Two levels of nesting: Data is nested under Sites, which are nested under Sources. To account for this structure, include random intercepts for Site and Source: (1|site)+(1|source)

Fixed effect: year

Overall model: temperature (min, mean, or max) ~ year + (1|source)+(1|site)

Nest data by temperature type:

temp_nest <- temp_pivot%>%
  nest(data = -temp_type)

Fit models for each temperature type (NOT including year*source interaction):

yr.lmms <- temp_nest %>%
  mutate(model = purrr::map(data, ~lmerTest::lmer(stream_temp~year+(1|source)+(1|site), data = .)%>% #run a model of temp~year for each site (referenced by "."); store results in new column called "model"
                     broom.mixed::tidy(effects="fixed", conf.int = TRUE)))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
  unnest(model)%>% #unnest model column to get rows for each site, columns for model metrics
  mutate(p.value = round(p.value, 4))%>%
  filter(!term == "(Intercept)")%>%
  dplyr::select(temp_type, term, estimate, std.error, statistic, df, p.value)%>%
  rename(predictor_name = term)
yr.lmms
## # A tibble: 4 × 7
##   temp_type predictor_name estimate std.error statistic    df p.value
##   <chr>     <chr>             <dbl>     <dbl>     <dbl> <dbl>   <dbl>
## 1 t_xbar    year             0.213     0.0189     11.3  1728.   0    
## 2 t_max     year             0.194     0.0322      6.02 1727.   0    
## 3 t_min     year             0.222     0.0145     15.4  1724.   0    
## 4 t_range   year            -0.0296    0.0268     -1.10 1723.   0.270

Significant increase in mean, min, and max temperature over time across all sites at the P<0.001 level. Effect sizes:

  • Maximum temperatures increase by 0.19 degrees for every 1 unit increase in year
  • Mean temperatures increase by 0.21 degrees for every 1 unit increase in year
  • Minimum temperatures increase by 0.22 degrees for every 1 unit increase in year

No change in daily temperature change.

Model including year x source interaction:

Overall model: temp(mean, min, max, or range) = year + year*source + (1|source) + (1|ite)

yr.lmms <- temp_nest %>%
  mutate(model = purrr::map(data, ~lmerTest::lmer(stream_temp~year+year*source+(1|source)+(1|site), data = .)%>% #run a model of temp~year for each site (referenced by "."); store results in new column called "model"
                     broom.mixed::tidy(effects="fixed", conf.int = TRUE)))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
  unnest(model)%>% #unnest model column to get rows for each site, columns for model metrics
  mutate(p.value = round(p.value, 4))%>%
  filter(!term == "(Intercept)")%>%
  dplyr::select(temp_type, term, estimate, std.error, statistic, df, p.value)%>%
  rename(predictor_name = term)
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `model = purrr::map(...)`.
## Caused by warning in `checkConv()`:
## ! unable to evaluate scaled gradient
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 2 remaining warnings.
yr.lmms
## # A tibble: 20 × 7
##    temp_type predictor_name         estimate std.error statistic      df p.value
##    <chr>     <chr>                     <dbl>     <dbl>     <dbl>   <dbl>   <dbl>
##  1 t_xbar    year                    0.0776     0.0328     2.37  1.58e+3  0.0181
##  2 t_xbar    sourcesnowmelt       -971.        90.0      -10.8   5.15e-1  0.188 
##  3 t_xbar    sourcesub_ice         199.        89.9        2.21  5.13e-1  0.419 
##  4 t_xbar    year:sourcesnowmelt     0.483      0.0445    10.9   1.70e+3  0     
##  5 t_xbar    year:sourcesub_ice     -0.0985     0.0445    -2.22  1.67e+3  0.0268
##  6 t_max     year                   -0.00593    0.0574    -0.103 7.74e+2  0.918 
##  7 t_max     sourcesnowmelt      -1265.       158.        -8.02  2.32e-1  0.452 
##  8 t_max     sourcesub_ice         126.       158.         0.799 2.32e-1  0.756 
##  9 t_max     year:sourcesnowmelt     0.631      0.0780     8.09  1.25e+3  0     
## 10 t_max     year:sourcesub_ice     -0.0623     0.0779    -0.800 1.36e+3  0.424 
## 11 t_min     year                    0.122      0.0248     4.92  1.72e+3  0     
## 12 t_min     sourcesnowmelt       -785.        67.9      -11.6   1.72e+3  0     
## 13 t_min     sourcesub_ice         211.        67.9        3.11  1.72e+3  0.0019
## 14 t_min     year:sourcesnowmelt     0.390      0.0336    11.6   1.72e+3  0     
## 15 t_min     year:sourcesub_ice     -0.105      0.0336    -3.11  1.72e+3  0.0019
## 16 t_range   year                   -0.128      0.0491    -2.61  1.44e+3  0.0091
## 17 t_range   sourcesnowmelt       -479.       135.        -3.56  1.41e+0  0.115 
## 18 t_range   sourcesub_ice         -82.2      135.        -0.611 1.40e+0  0.626 
## 19 t_range   year:sourcesnowmelt     0.240      0.0667     3.60  1.61e+3  0.0003
## 20 t_range   year:sourcesub_ice      0.0407     0.0666     0.611 1.65e+3  0.541

Note for interpretation - reference condition = glacier-fed streams

t_xbar:

  • Significant change (+0.07 deg/year) in mean august temps for glacier-fed streams
  • intercept for august stream temperatures were not significantly different between glacier-fed, snowmelt-fed, and subterranean ice-fed streams.
  • Rate of temp change for snowmelt-fed streams was significantly higher (+0.56 deg/year) than for glacier-fed streams.
  • Rate of temp change for subterranean-ice fed streams was significantly lower (-0.023 deg/year) than for glacier-fed streams
  • Overall: Glacier-fed and snowmelt-fed streams have significantly higher mean august temperatures; subterranean ice-fed streams have significantly lower august temperatures.

t_max:

  • No significant change in maxiumum temperatures for glacier-fed or subterranean ice-fed streams.
  • Significant increase in maxiumum temperatures (+0.63 deg/year) for snowmelt-fed streams.
  • No significant differences in intercepts for max temp for different source types.
  • Overall: maximum temperatures are significantly higher in snowmelt-fed streams, but haven’t changed in glacier or sub-ice streams.

t_min:

  • Significant increase in min august temps for glacier fed streams (+0.12 deg/year)
  • Intercept for minimum snowmelt-fed august temp is significantly lower than glacier-fed
  • Intercept for minimum sub ice-fed august temp is significantly higher than glacier-fed
  • Minimum temps have increased more in Snowmelt-fed streams (+0.5 deg/year) than in glacier-fed streams
  • Minimum temps have increased less in subterranean ice-fed streams (+0.02 deg/year)
  • Overall: Minimum temperatures have increased in all three source types, with by far the largest increases in snowmelt and glacier-fed streams. Minimum temp increases are an order of magnitude smaller in sub-ice streams than in glacier or snowmelt streams.

t_range:

  • Significant decrease in t_range in glacier-fed streams (-0.13 deg/year)
  • No differences in intercept for t_rage between source types
  • Slope for t_range for snowmelt-fed streams is significantly different from glacier-fed streams (+0.11 deg/year)
  • Slope for t_range for sub-ice streams is not significantly different from glacier-fed streams (i.e. also a decrease of -0.13 deg/yr)
  • Overall: Daily temperature fluctuations have decreased in glacier and sub-ice streams over time; increased in snowmelt-fed streams over time.

a. Model Testing

For loop to create model objects for each temperature type and draw residual histograms, qq plots, and residual vs fitted plots for each temperature type. Overall model is:

Overall model: temp(mean, min, max, or range) = year + year*source + (1|source) + (1|ite)

temp.types <- unique(temp_pivot$temp_type) #vector of all temp types
models <- list() #empty list to recieve models
plots <- list() #empty list to recieve diagnostic plots

for(i in 1:length(temp.types)){
  d <- temp_pivot%>%
    filter(temp_type == temp.types[i]) #extract rows with first temp type from list
  
  mod <- lmer(stream_temp~ #response variable
                year+(year*source)+ #fixed effects (including year x source interaction)
                (1|source)+(1|site), #random effects
              data = d) #data source
  
  models[[i]]<-mod #save model object in model list
  
  df_mod <- broom.mixed::augment(mod) #augment model and convert to df for model diagnostic plots
  df_mod[".stdresid"] <- resid(mod, type = "pearson") #calculate residuals for model
  
  resid<-ggplot(df_mod, aes(x = .fitted, y = .resid))+ #plot fitted vs residuals
    geom_point()+ #points
    geom_hline(yintercept = 0)+ #horizontal line at y = 0
    labs(title = paste("Residual vs Fitted", temp.types[i], sep = " - ")) #axis labels
  
  qq<- ggplot(df_mod, aes(sample = .stdresid)) + #qq plot
  geom_qq() +
  geom_qq_line()+ #add points and lines
    labs(title = paste("QQ Plot", temp.types[i], sep = " - ")) #axis labels
  
  hist<- ggplot(df_mod, aes(x = .resid))+
    geom_histogram()+ #draw histogram of residuals
    labs(title = paste("Residual Histogram", temp.types[i], "= year+(year*source)+(1|source)+(1|site)", sep = " ")) #axis labels
    
  
  plots[[i]]<-(resid|qq)/hist #store residual + qq plot in plots list
  
  print(paste("finished", temp.types[i], sep = " "))
}
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## [1] "finished t_xbar"
## [1] "finished t_max"
## Warning: Model failed to converge with 1 negative eigenvalue: -5.1e-07
## [1] "finished t_min"
## [1] "finished t_range"

Individual model summaries:

models[[1]]
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: stream_temp ~ year + (year * source) + (1 | source) + (1 | site)
##    Data: d
## REML criterion at convergence: 6039.945
## Random effects:
##  Groups   Name        Std.Dev.
##  site     (Intercept) 0.8216  
##  source   (Intercept) 3.2072  
##  Residual             1.3586  
## Number of obs: 1733, groups:  site, 13; source, 3
## Fixed Effects:
##         (Intercept)                 year       sourcesnowmelt  
##          -153.78969              0.07756           -970.78453  
##       sourcesub_ice  year:sourcesnowmelt   year:sourcesub_ice  
##           198.92137              0.48313             -0.09853  
## optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 2 lme4 warnings
models[[2]]
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: stream_temp ~ year + (year * source) + (1 | source) + (1 | site)
##    Data: d
## REML criterion at convergence: 7977.107
## Random effects:
##  Groups   Name        Std.Dev.
##  site     (Intercept) 1.487   
##  source   (Intercept) 5.893   
##  Residual             2.380   
## Number of obs: 1733, groups:  site, 13; source, 3
## Fixed Effects:
##         (Intercept)                 year       sourcesnowmelt  
##           1.621e+01           -5.929e-03           -1.265e+03  
##       sourcesub_ice  year:sourcesnowmelt   year:sourcesub_ice  
##           1.258e+02            6.305e-01           -6.234e-02
models[[3]]
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: stream_temp ~ year + (year * source) + (1 | source) + (1 | site)
##    Data: d
## REML criterion at convergence: 5077.39
## Random effects:
##  Groups   Name        Std.Dev.
##  site     (Intercept) 0.9618  
##  source   (Intercept) 1.7975  
##  Residual             1.0256  
## Number of obs: 1733, groups:  site, 13; source, 3
## Fixed Effects:
##         (Intercept)                 year       sourcesnowmelt  
##           -244.0024               0.1219            -785.0235  
##       sourcesub_ice  year:sourcesnowmelt   year:sourcesub_ice  
##            211.0062               0.3900              -0.1045
models[[4]]
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: stream_temp ~ year + (year * source) + (1 | source) + (1 | site)
##    Data: d
## REML criterion at convergence: 7442.001
## Random effects:
##  Groups   Name        Std.Dev.
##  site     (Intercept) 1.882   
##  source   (Intercept) 3.716   
##  Residual             2.034   
## Number of obs: 1733, groups:  site, 13; source, 3
## Fixed Effects:
##         (Intercept)                 year       sourcesnowmelt  
##           261.24228             -0.12829           -479.02364  
##       sourcesub_ice  year:sourcesnowmelt   year:sourcesub_ice  
##           -82.23986              0.24007              0.04073

Check diagnostic plots:

plots[[1]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plots[[2]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plots[[3]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plots[[4]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

None of the models look particularly good - residuals vs fitted look ok, but qq plots do not follow lines. Residual histograms not normal.

b. Model Vizualization:

Scatterplot of change in each temperature variable; x = year, y = temp, faceted by temp type, color = source

source.pal <- c("#C26ED6", "#F89225", "#76D96F")
source.names <- c("Glacier", "Snowmelt", "Icy Seep")

temp.yr <- ggplot(temp_pivot, aes(x = year, y = stream_temp, color = source))+
  geom_point(alpha = 0.4)+
  geom_smooth(method = "lm")+
  facet_wrap(~temp_type, scales = "free", nrow = 4, ncol = 1)+
  theme_classic()+
  labs(x = "Year", y = "Temperature, C", color = "Source", title = "Model: temperature = year+(year*source)+(1|source)+(1|site)")+
  scale_color_manual(values = source.pal, labels = source.names)+
  theme(legend.position = "bottom", 
        strip.text = element_text(size = 10, face = "bold"), 
        axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10, face = "bold"))
temp.yr
## `geom_smooth()` using formula 'y ~ x'

save:

ggsave("Temperature/plots/model_figs/temp_yr.jpg", temp.yr, device = "jpeg", units = "in", dpi = "retina", height = 11, width = 8.5)
## `geom_smooth()` using formula 'y ~ x'

iii. Linear Mixed models testing changes in temperature over time for each source type (seperate models):

Data prep and model fitting:

source_nest <- temp_pivot%>%
  nest(data = -c(source, temp_type)) #nest data by source and temperature type

source.yr.lmms <- source_nest %>%
  mutate(model = purrr::map(data, ~lmerTest::lmer(stream_temp~year+(1|site), data = .)%>% #run a model of temp~year for each site (referenced by "."); store results in new column called "model"
                     broom.mixed::tidy(effects="fixed", conf.int = TRUE)))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
  unnest(model)%>% #unnest model column to get rows for each site, columns for model metrics
  mutate(p.value = round(p.value, 4))%>%
  filter(!term == "(Intercept)")%>%
  dplyr::select(source, temp_type, term, estimate, std.error, statistic, df, p.value)%>% #drop extra columns
  rename(predictor_name = term) #rename for easier reading

source.yr.lmms
## # A tibble: 12 × 8
##    source   temp_type predictor_name estimate std.error statistic    df p.value
##    <chr>    <chr>     <chr>             <dbl>     <dbl>     <dbl> <dbl>   <dbl>
##  1 snowmelt t_xbar    year            0.562     0.0438     12.8    731.  0     
##  2 snowmelt t_max     year            0.628     0.0745      8.43   730.  0     
##  3 snowmelt t_min     year            0.512     0.0332     15.4    728.  0     
##  4 snowmelt t_range   year            0.112     0.0620      1.81   728.  0.0706
##  5 sub_ice  t_xbar    year           -0.0215    0.0151     -1.43   437.  0.155 
##  6 sub_ice  t_max     year           -0.0696    0.0357     -1.95   437.  0.0521
##  7 sub_ice  t_min     year            0.0176    0.00837     2.11   435.  0.0358
##  8 sub_ice  t_range   year           -0.0886    0.0345     -2.57   436.  0.0106
##  9 glacier  t_xbar    year            0.0760    0.0109      7.00   555.  0     
## 10 glacier  t_max     year           -0.00677   0.0193     -0.350  555.  0.727 
## 11 glacier  t_min     year            0.121     0.0107     11.3    555.  0     
## 12 glacier  t_range   year           -0.128     0.0191     -6.70   555.  0

Results:

  • Snowmelt: Significant increase in minimum, mean, and maximum temperatures; marginal increase in temp range (p=0.07)
    • Summary: August stream temperatures in snowmelt-fed streams have increased significantly, with the largest increases occurring in maximum temperatures. The magnitude of daily temperature fluctuations has not changed.
  • Sub_ice: No change in mean temperature, marginal decrease in max temperature (p=0.052), significant increase in min temperature, significant decrease in temp range
    • Summary: August temperatures in subterranean ice-fed streams have not changed on average, but maximum temperatures have decreased and minimum temperatures have increased, leading to a reduction in the magnitude of daily temperature fluctuations.
  • Glacier: Significant increase in average temperature, no change in max temperature, significant increase in minimum temperature, significant decrease in temp range
    • Summary: August temperatures in glacier-fed streams have gotten significantly warmer on average. Maximum daily tempreatures have not changed, but minimum daily temperatures have increased, leading to a significant reduction in the magnitude of daily temperature fluctuations.

Bottom Line: All streams have experienced some warming, but exist on a continuum - snowmelt streams have warmed uniformly (higher min, mean, and max), but daily temperature fluctuations remain about the same. Glacier fed streams have higher minimum and mean temperatures, but maximum temperatures have not changed. Subterranean ice fed streams have seen the least change, with the only significant increase being in daily minimum temperature. Both glacier fed and subterranean ice fed streams have seen a reduction in the magnitude of daily temperature fluctuations due to increasing minimum temperatures and stable (or maybe even decreasing for sub ice) maximum temperatures.

ii. Linear Mixed Models - Stream temperature as a function of SNOTEL variables:

Random effects structure:

  • Two levels of nesting: Data is nested under Sites, which are nested under Sources. To account for this structure, include random intercepts for Site and Source: (1|site)+(1|source)
  • In addition, all data is crossed with Year - Data within each year aren’t hierarchical, but may still be correlated. To account for this, include random intercept for year: (1|year)

Fixed effects:

Possibilities are: airtemp_summer, swe_zero_doy, swe_max, melt_days, and swe_max_doy. airtemp_summer and swe_max_doy are strongly correlated (>0.7), so one needs to be dropped - swe_max_doy is also strongly correlated with melt_days, so dropping swe_max_doy allows more other variables to remain in the model. In addition, the timing of maximum snowpack is probably less relevant than when that snowpack melted out, which is captured by swe_zero_doy and melt_days. This leaves a fixed effects structure of:

airtemp_summer+swe_zero_doy+swe_max+melt_days (Using all centered/scaled variables)

Overall model (initial):

stream_temp ~ summerAirtemp_scale+sweZeroDoy_scale+sweMax_scale+meltDays_scale+(1|source)+(1|site)+(1|year)

Fit models and return summaries for each temperature type:

snow.lmms <- temp_nest %>%
  mutate(model = purrr::map(data, ~lmerTest::lmer(stream_temp~summerAirtemp_scale+sweZeroDoy_scale+sweMax_scale+meltDays_scale+(1|source)+(1|site)+(1|year), data = .)%>% #run a model of temp~snotel vars for each site (referenced by "."); store results in new column called "model"
                     broom.mixed::tidy(effects="fixed", conf.int = TRUE)))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
  unnest(model)%>% #unnest model column to get rows for each site, columns for model metrics
  mutate(p.value = round(p.value, 4))%>%
  filter(!term == "(Intercept)")%>%
  dplyr::select(temp_type, term, estimate, std.error, statistic, df, p.value)%>%
  rename(predictor_name = term)%>%
  arrange(p.value)
snow.lmms
## # A tibble: 16 × 7
##    temp_type predictor_name      estimate std.error statistic    df p.value
##    <chr>     <chr>                  <dbl>     <dbl>     <dbl> <dbl>   <dbl>
##  1 t_min     sweMax_scale         -0.527      0.137   -3.84    2.93  0.0325
##  2 t_xbar    sweMax_scale         -0.593      0.175   -3.40    3.11  0.0402
##  3 t_max     sweMax_scale         -0.682      0.286   -2.39    2.80  0.103 
##  4 t_min     summerAirtemp_scale   0.144      0.163    0.888   3.19  0.436 
##  5 t_range   sweMax_scale         -0.155      0.257   -0.603   2.40  0.598 
##  6 t_range   summerAirtemp_scale  -0.162      0.305   -0.531   2.64  0.637 
##  7 t_range   meltDays_scale        0.189      0.467    0.405   2.62  0.716 
##  8 t_min     meltDays_scale       -0.0970     0.249   -0.389   3.18  0.722 
##  9 t_xbar    summerAirtemp_scale   0.0634     0.207    0.306   3.41  0.777 
## 10 t_xbar    meltDays_scale       -0.0630     0.318   -0.198   3.39  0.854 
## 11 t_max     meltDays_scale        0.0964     0.522    0.185   3.07  0.865 
## 12 t_range   sweZeroDoy_scale      0.0640     0.510    0.125   2.57  0.909 
## 13 t_max     sweZeroDoy_scale      0.0466     0.570    0.0818  3.01  0.940 
## 14 t_min     sweZeroDoy_scale     -0.0128     0.273   -0.0469  3.12  0.965 
## 15 t_max     summerAirtemp_scale  -0.0145     0.341   -0.0424  3.10  0.969 
## 16 t_xbar    sweZeroDoy_scale      0.0143     0.347    0.0412  3.32  0.970

In overall model, only significant predictor is swe_max - higher maximum swe is associated with lower minimum and average temperatures across all stream types.

a. Model testing/validation

For loop to create model objects and diagnostic plots for each temp model:

st.models <- list() #empty list to recieve models
st.plots <- list() #empty list to recieve diagnostic plots

for(i in 1:length(temp.types)){
  d <- temp_pivot%>%
    filter(temp_type == temp.types[i]) #extract rows with first temp type from list
  
  mod <- lmer(stream_temp~ #response variable
                summerAirtemp_scale+sweZeroDoy_scale+sweMax_scale+meltDays_scale+ #fixed effects 
                (1|source)+(1|site), #random effects
              data = d) #data source
  
  st.models[[i]]<-mod #save model object in model list
  
  df_mod <- broom.mixed::augment(mod) #augment model and convert to df for model diagnostic plots
  df_mod[".stdresid"] <- resid(mod, type = "pearson") #calculate residuals for model
  
  resid<-ggplot(df_mod, aes(x = .fitted, y = .resid))+ #plot fitted vs residuals
    geom_point()+ #points
    geom_hline(yintercept = 0)+ #horizontal line at y = 0
    labs(title = paste("Residual vs Fitted", temp.types[i], sep = " - ")) #axis labels
  
  qq<- ggplot(df_mod, aes(sample = .stdresid)) + #qq plot
  geom_qq() +
  geom_qq_line()+ #add points and lines
    labs(title = paste("QQ Plot", temp.types[i], sep = " - ")) #axis labels
  
  hist<- ggplot(df_mod, aes(x = .resid))+
    geom_histogram()+ #draw histogram of residuals
    labs(title = paste(temp.types[i], "= sweMax+sweZeroDoy+meltDays+summerAirtemp+(1|source)+(1|site)+(1|year)", sep = " "))+ #axis labels
    theme(title = element_text(size = 10))
    
  
  st.plots[[i]]<-(resid|qq)/hist #store residual + qq plot in plots list
  
  print(paste("finished", temp.types[i], sep = " "))
}
## [1] "finished t_xbar"
## [1] "finished t_max"
## [1] "finished t_min"
## [1] "finished t_range"

Check diagnostic plots:

st.plots[[1]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

st.plots[[2]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

st.plots[[3]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

st.plots[[4]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Same problems as previous models - qq plots look skewed, non-normal distribution.

IV. Model visualizations

Data Prep:

pred_data <- temp_pivot%>%
  pivot_longer(!c(year:transform, yr_scale), names_to = "pred_name", values_to = "pred_value")

T Min:

  ggplot(filter(pred_data, temp_type == "t_min"),aes(x = pred_value, y = stream_temp, color = source))+
  geom_point(alpha = 0.4)+
  geom_smooth(method = "lm")+
  facet_wrap(~pred_name, scales = "free")+
  theme_classic()+
  labs(x = "Year", y = "Aug. 1-30 Minimum daily temperature, C", color = "Source")+
  scale_color_manual(values = source.pal, labels = source.names)+
  theme(legend.position = "bottom", 
        strip.text = element_text(size = 10, face = "bold"), 
        axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10, face = "bold"))
## `geom_smooth()` using formula 'y ~ x'

T Xbar:

  ggplot(filter(pred_data, temp_type == "t_xbar"),aes(x = pred_value, y = stream_temp, color = source))+
  geom_point(alpha = 0.4)+
  geom_smooth(method = "lm")+
  facet_wrap(~pred_name, scales = "free")+
  theme_classic()+
  labs(x = "Year", y = "Aug. 1-30 mean daily temperature, C", color = "Source")+
  scale_color_manual(values = source.pal, labels = source.names)+
  theme(legend.position = "bottom", 
        strip.text = element_text(size = 10, face = "bold"), 
        axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10, face = "bold"))
## `geom_smooth()` using formula 'y ~ x'

T Max:

  ggplot(filter(pred_data, temp_type == "t_max"),aes(x = pred_value, y = stream_temp, color = source))+
  geom_point(alpha = 0.4)+
  geom_smooth(method = "lm")+
  facet_wrap(~pred_name, scales = "free")+
  theme_classic()+
  labs(x = "Year", y = "Aug. 1-30 maximum daily temperature, C", color = "Source")+
  scale_color_manual(values = source.pal, labels = source.names)+
  theme(legend.position = "bottom", 
        strip.text = element_text(size = 10, face = "bold"), 
        axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10, face = "bold"))
## `geom_smooth()` using formula 'y ~ x'

T range:

  ggplot(filter(pred_data, temp_type == "t_range"),aes(x = pred_value, y = stream_temp, color = source))+
  geom_point(alpha = 0.4)+
  geom_smooth(method = "lm")+
  facet_wrap(~pred_name, scales = "free")+
  theme_classic()+
  labs(x = "Year", y = "Aug. 1-30 daily temperature change, C", color = "Source")+
  scale_color_manual(values = source.pal, labels = source.names)+
  theme(legend.position = "bottom", 
        strip.text = element_text(size = 10, face = "bold"), 
        axis.text = element_text(size = 10), 
        axis.title = element_text(size = 10, face = "bold"))
## `geom_smooth()` using formula 'y ~ x'

V. Overall Summary:

  1. Across all sites and sources, stream temperatures have increased significantly over time.
  2. These increases become more nuanced when modeling each source separately. Snowmelt fed streams have undergone the most change, followed by glacier fed streams and then subterranan ice streams. Minimum temperatures have increased significantly in all three source types.
  3. Across all sites and sources, larger snowpacks (higher maximum swe) are associated with lower average and minimum August stream temperatures.